Files can be downloaded from:https://github.com/chikusquish/MHDprojectIE2022
Install packages for this session. Packages are as shown in: https://github.com/chikusquish/MHDprojectIE2022/blob/main/R-pseudocodes
Load libraries for this session
library(phyloseq)
library(ggplot2)
library(tidyverse)
library(ggpubr)
library(ggsci)
library(stringr)
library(dutchmasters)
library(RColorBrewer)
load("STH-IE_visualisation.RData")
phylo_obj@sam_data[1:3,1:3]
## SampleID number project
## 272IE 272IE 1 PCIE
## 273IE 273IE 2 PCIE
## 290IE 290IE 3 PCIE
pd <- psmelt(phylo_obj)
Figure 3.7a
my_comparisons <- list( c("Definite_IE", "Healthy"), c("Definite_IE", "Highrisk_IE"), c("Healthy", "Highrisk_IE") )
boxplot_shannon<-ggplot(data = Adiv, aes(x = IE_dx, y = Shannon, fill=IE_dx)) +
geom_boxplot(width=0.5)+
theme_minimal()+
theme(legend.position = "none")+
stat_summary(fun.y="mean", geom="point", shape=23, size=3, fill="white")+
geom_jitter(shape=1, position=position_jitter(0.1))+
ggtitle("Shannon Index by Diagnosis") +
scale_x_discrete(name= "Diagnosis",
limits=c("Definite_IE", "Healthy", "Highrisk_IE"),
labels=c("Definite IE", "Healthy", "High risk IE"))+
scale_fill_manual(limits=c("Definite_IE", "Healthy", "Highrisk_IE"),
values=c("skyblue","orange","purple"))+
stat_compare_means(comparisons=my_comparisons, label.y=c(4.2, 4.6, 4.4))
boxplot_shannon
Figure 3.7b
boxplot_chao1<- ggplot(data = Adiv, aes(x = IE_dx, y = Chao1, fill=IE_dx)) +
geom_boxplot(width=0.5)+
theme_minimal()+
theme(legend.position = "none")+
stat_summary(fun.y="mean", geom="point", shape=23, size=3, fill="white")+
geom_jitter(shape=1, position=position_jitter(0.1))+
ggtitle("Chao1 Index by Diagnosis") +
scale_x_discrete(name= "Diagnosis",
limits=c("Definite_IE", "Healthy", "Highrisk_IE"),
labels=c("Definite IE", "Healthy", "High risk IE"))+
scale_fill_manual(limits=c("Definite_IE", "Healthy", "Highrisk_IE"),
values=c("skyblue","orange","purple"))+
stat_compare_means(comparisons=my_comparisons, label.y=c(160,180,170))
boxplot_chao1
Figure 3.7c
boxplot_invsimp<-ggplot(data = Adiv, aes(x = IE_dx, y = InvSimpson, fill=IE_dx)) +
geom_boxplot(width=0.5)+
theme_minimal()+
theme(legend.position = "none")+
stat_summary(fun.y="mean", geom="point", shape=23, size=3, fill="white")+
geom_jitter(shape=1, position=position_jitter(0.1))+
ggtitle("InvSimpson Index by Diagnosis") +
scale_x_discrete(name= "Diagnosis",
limits=c("Definite_IE", "Healthy", "Highrisk_IE"),
labels=c("Definite IE", "Healthy", "High risk IE"))+
scale_fill_manual(limits=c("Definite_IE", "Healthy", "Highrisk_IE"),
values=c("skyblue","orange","purple"))+
stat_compare_means(comparisons=my_comparisons, label.y=c(33, 38, 35))
boxplot_invsimp
Figure 3.7d
boxplot_SEI<-ggplot(data = Adiv, aes(x = IE_dx, y = Evenness, fill=IE_dx)) +
geom_boxplot(width=0.5)+
theme_minimal()+
theme(legend.position = "none")+
stat_summary(fun.y="mean", geom="point", shape=23, size=3, fill="white")+
geom_jitter(shape=1, position=position_jitter(0.1))+
ggtitle("Shannon Evenness Index by Diagnosis") +
scale_x_discrete(name= "Diagnosis",
limits=c("Definite_IE", "Healthy", "Highrisk_IE"),
labels=c("Definite IE", "Healthy", "High risk IE"))+
scale_fill_manual(limits=c("Definite_IE", "Healthy", "Highrisk_IE"),
values=c("skyblue","orange","purple"))+
stat_compare_means(comparisons=my_comparisons, label.y=c(0.84, 0.90, 0.87))
boxplot_SEI
Generate tools for beta diversity visualisation
library("phyloseq")
library("ggplot2")
library("dplyr")
library("ggpubr")
library("Matrix")
library("reshape2")
library("vegan")
phylo2=phylo_obj
relab_genera = transform_sample_counts(phylo2, function(x) x / sum(x) * 100)
head(otu_table(relab_genera)[,1:6])
sample_data(phylo2)$IE_dx<- factor((sample_data(phylo_obj)$IE_dx), levels=c("Definite_IE", "Healthy", "Highrisk_IE"))
abrel_bray <- phyloseq::distance(relab_genera, method = "bray")
abrel_bray <- as.matrix(abrel_bray)
head(abrel_bray)[,1:6]
sub_dist <- list()
relab_genera = transform_sample_counts(phylo2, function(x) x / sum(x) * 100) ### run again to ensure the loop works
groups_all <- sample_data(relab_genera)$IE_dx
##### below for-loop must all be highlighted to run otherwise df.bray shows error ######
for (group in levels(groups_all)) {
row_group <- which(groups_all == group)
sample_group <- sample_names(relab_genera)[row_group]
sub_dist[[group]] <- abrel_bray[sample_group, sample_group]
sub_dist[[group]][!lower.tri(sub_dist[[group]])] <- NA
}
braygroups<- melt(sub_dist)
df.bray <- braygroups[complete.cases(braygroups), ]
df.bray$L1 <- factor(df.bray$L1, levels=names(sub_dist))
head(df.bray)
Figure 3.7e Beta diversity box plot
df.bray$Diagnosis<-df.bray$L1
betdiv_boxplot1<-ggplot(df.bray, aes(x=Diagnosis, y=value, col=Diagnosis)) +
geom_jitter() +
geom_boxplot(alpha=0.6) +
ylab("Bray-Curtis diversity") +
xlab("Diagnosis")+
ggtitle("Bray-Curtis Diversity in Salivary Microbiome of \nHealthy Samples and Cardiac Patients")+
theme_minimal()+
theme(plot.title=element_text(hjust=0.5),
legend.position="none",
axis.text.x=element_text(hjust=0.5,vjust=1,size=10),
axis.text.y=element_text(size=10))+
scale_color_manual(limits=c("Definite_IE", "Healthy", "Highrisk_IE"),
values=c("skyblue", "orange", "purple"))+
scale_x_discrete(limits=c("Definite_IE", "Healthy", "Highrisk_IE"),
labels=c("Definite IE", "Healthy", "High risk IE"))+
stat_compare_means(method="wilcox.test",
comparisons = my_comparisons,
label.y = c(1.0, 1.10, 1.05))
betdiv_boxplot1
Figure 3.7f Beta diversity PCoA plot
ord = ordinate(relab_genera, method="PCoA", distance = "bray")
betdiv_pcoa<-plot_ordination(relab_genera, ord,
color = "IE_dx",
shape="IE_dx") +
geom_point(size=3) +
stat_ellipse(aes(group=IE_dx))+
theme_minimal()+
theme(legend.position = "top",
plot.title=element_text(hjust=0.5))+
scale_color_manual(limits=c("Definite_IE", "Healthy", "Highrisk_IE"),
values=c("skyblue", "orange", "purple"))+
scale_fill_manual(name="Diagnosis")+
ggtitle("PCoA Plot of Salivary Microbial Diversity in \nHealthy Samples and Cardiac Patients")
betdiv_pcoa
Generate heattree package
library("metacoder")
s=main
names(s)[names(s) == 'X272IE_S111_profile_IEcount.txt'] <- "272IE"
names(s)[names(s) == 'X273IE_S112_profile_IEcount.txt'] <- "273IE"
names(s)[names(s) == 'X290IE_S113_profile_IEcount.txt'] <- "290IE"
names(s)[names(s) == 'X293IE_S13_profile_IEcount.txt'] <- "293IE"
names(s)[names(s) == 'X294IE_S114_profile_IEcount.txt'] <- "294IE"
names(s)[names(s) == 'X295IE_S14_profile_IEcount.txt'] <- "295IE"
names(s)[names(s) == 'X296IE_S115_profile_IEcount.txt'] <- "296IE"
names(s)[names(s) == 'SRS013942_profile_count.txt'] <- "SRS013942"
names(s)[names(s) == 'SRS014468_profile_count.txt'] <- "SRS014468"
names(s)[names(s) == 'SRS014692_profile_count.txt'] <- "SRS014692"
names(s)[names(s) == 'SRS015055_profile_count.txt'] <- "SRS015055"
names(s)[names(s) == 'SRS019120_profile_count.txt'] <- "SRS019120"
names(s)[names(s) == 'SRS065518_profile_count.txt'] <- "SRS065518"
names(s)[names(s) == 'SRS104275_profile_count.txt'] <- "SRS104275"
names(s)[names(s) == 'SRS147126_profile_count.txt'] <- "SRS147126"
s=as.data.frame(s[,-which(names(s) %in% c("SRS015055", "SRS019120"))])
taxmapobj <- parse_tax_data(s,
class_cols = "X.clade_name", # the column that contains taxonomic information
class_sep = "|", # The character used to separate taxa in the classification
class_regex = "^(.+)__(.+)$", # Regex identifying where the data for each taxon is
class_key = c(tax_rank = "info", # A key describing each regex capture group
tax_name = "taxon_name"))
print(taxmapobj)
taxmapobj$data$tax_data <- zero_low_counts(taxmapobj, dataset = "tax_data", min_count = 5)
metadata2=Metadata
print(metadata2)
no_reads <- rowSums(taxmapobj$data$tax_data[, metadata2$SampleID]) == 0
sum(no_reads)
taxmapobj <- filter_obs(taxmapobj, target = "tax_data", ! no_reads, drop_taxa = TRUE)
print(taxmapobj)
taxmapobj$data$tax_abund <- calc_taxon_abund(taxmapobj, "tax_data",
cols = metadata2$SampleID)
print(taxmapobj)
taxmapobj$data$tax_occ <- calc_n_samples(taxmapobj, "tax_abund",
groups = metadata2$IE_dx,
cols = metadata2$SampleID)
set.seed(1) # This makes the plot appear the same each time it is run
Figure 3.8a
heattreeHHS<-heat_tree(taxmapobj,
node_label = taxon_names,
node_size = n_obs,
node_size_range = c(0.01,0.05),
edge_size_range = c(0.005, 0.005),
node_color = Healthy,
title = "Bacteria Species in Saliva of Healthy Human Samples",
title_size = 0.04,
node_size_axis_label = "OTU count",
node_color_axis_label = "Samples with reads",
layout = "davidson-harel", # The primary layout algorithm
initial_layout = "reingold-tilford")# The primary layout algorithm
heattreeHHS
Figure 3.8b
heattreeIE<-heat_tree(taxmapobj,
node_label = taxon_names,
node_size = n_obs,
node_size_range = c(0.01,0.05),
edge_size_range = c(0.005, 0.005),
node_color = Definite_IE,
title = "Bacteria Species in Saliva of Patients with Definite IE",
title_size = 0.04,
node_size_axis_label = "OTU count",
node_color_axis_label = "Samples with reads",
layout = "davidson-harel", # The primary layout algorithm
initial_layout = "reingold-tilford")# The primary layout algorithm
heattreeIE
Figure 3.8b
heattreehighriskIE<-heat_tree(taxmapobj,
node_label = taxon_names,
node_size = n_obs,
node_size_range = c(0.01,0.05),
edge_size_range = c(0.005, 0.005),
node_color = Highrisk_IE,
title = "Bacteria Species in Saliva of Patients with High Risk IE",
title_size = 0.04,
node_size_axis_label = "OTU count",
node_color_axis_label = "Samples with reads",
layout = "davidson-harel", # The primary layout algorithm
initial_layout = "reingold-tilford")# The primary layout algorithm
heattreehighriskIE
Figure 3.9a
abundance_phylum<-ggplot(data = pd, aes(x = Phylum, y = Abundance)) +
geom_bar(stat = 'identity', aes(fill = IE_dx),width = 0.5) +
theme_minimal()+
theme(plot.title=element_text(size=12),
legend.position="top",
legend.title = element_text(size=11),
legend.text=element_text(size=8),
legend.key.size=unit(0.5,"cm"),
axis.text = element_text(size = 12),
axis.text.x = element_text(angle = 90, hjust = 1, vjust=0.5)) +
scale_fill_manual(name='Diagnosis',limits=c("Definite_IE","Healthy", "Highrisk_IE"),
labels=c("Definite IE","Healthy", "High risk IE"), values=c("skyblue", "orange", "purple"))+
ggtitle("Oral Microbial Abundance by Phylum") +
xlab("Phylum")+
ylab("Abundance")
abundance_phylum
Figure 3.9b
pd$IE_dx = factor(pd$IE_dx, levels=c("Healthy", "Definite_IE", "Highrisk_IE"))
abundance_genus<-ggplot(data = pd, aes(x = Genus, y = Abundance)) +
geom_bar(stat = 'identity', aes(fill = IE_dx),width = 0.5) +
theme_minimal()+
theme(plot.title=element_text(size=20),
legend.position="top",
legend.title = element_text(size=11),
legend.text=element_text(size=8),
legend.key.size=unit(0.5,"cm"),
axis.text = element_text(size = 12),
axis.text.x = element_text(angle = 90, hjust = 1, vjust=0.5)) +
scale_fill_manual(name='Diagnosis',limits=c("Definite_IE","Healthy", "Highrisk_IE"),
labels=c("Definite IE","Healthy", "High risk IE"), values=c("skyblue", "orange", "purple"))+
ggtitle("Oral Microbial Abundance by Genus")+
xlab("Genus")+
ylab("Abundance")
abundance_genus
Figure 3.9c
relabundance_diagnosis1<-ggplot(data = pd, aes(x = Phylum, y = Abundance)) +
geom_bar(stat = 'identity', position= "fill", aes(fill = IE_dx),width = 0.5) +
theme_minimal()+
theme(plot.title=element_text(size=12),
legend.position="top",
legend.title = element_text(size=11),
legend.text=element_text(size=8),
legend.key.size=unit(0.5,"cm"),
axis.text = element_text(size = 12),
axis.text.x = element_text(angle = 90, hjust = 1, vjust=0.5)) +
labs(x = "Clinical Diagnosis", y = "Relative abundance (%)")+
scale_fill_manual(name='Diagnosis',limits=c("Definite_IE","Healthy", "Highrisk_IE"),
labels=c("Definite IE","Healthy", "High risk IE"), values=c("skyblue", "orange", "purple"))+
ggtitle("Relative Abundance by Diagnosis (Phylum)") +
xlab("Phylum")+
ylab("Abundance")
relabundance_diagnosis1
Figure 3.9d
px<-pd
px$Genus[px$Genus==0]=NA
relabundance_diagnosis2<-ggplot(data = pd, aes(x = Genus, y = Abundance, na.rm=T)) +
geom_bar(stat = 'identity', position= "fill", aes(fill = IE_dx),width = 0.5) +
theme_minimal()+
theme(plot.title=element_text(size=20),
legend.position="top",
legend.title = element_text(size=11),
legend.text=element_text(size=8),
legend.key.size=unit(0.5,"cm"),
axis.text = element_text(size = 12),
axis.text.x = element_text(angle = 90, hjust = 1, vjust=0.5)) +
labs(x = "Clinical Diagnosis", y = "Relative abundance (%)") +
scale_fill_manual(name="Diagnosis",limits=c("Definite_IE","Healthy", "Highrisk_IE"),
labels=c("Definite IE","Healthy", "High risk IE"), values=c("skyblue", "orange", "purple"))+
ggtitle("Relative Abundance by Diagnosis (Genus)") +
xlab("Genus")+
ylab("Abundance")
relabundance_diagnosis2
Figure 3.10a
heatmap_allspecies<-plot_heatmap(phylo_obj, method = "NMDS", distance = "bray",
sample.label = "SampleID", taxa.label = "Species",
taxa.order = "Species", na.value="white", low="white", high="red") +
theme(legend.position = "top", legend.text =element_text(size=8), legend.key.width = unit(1.5, "cm"))+
ggtitle("Species abundance across all samples (NMDS)")+
scale_x_discrete(limits=c("272IE","290IE", "294IE", "295IE","SRS014468", "SRS014692", "SRS013942", "SRS147126", "SRS104275", "SRS065518", "273IE", "293IE", "296IE"))
heatmap_allspecies
show most abundant OTU
sample_sums(phylo_obj)
median(sample_sums(phylo_obj))
total = median(sample_sums(phylo_obj))
abundant_OTU2 = phyloseq::filter_taxa(phylo_obj, function(x) sum(x>total*0.20)>0, TRUE)
abundant_OTU2
IE_abundant = subset_samples(abundant_OTU2, IE_dx=="Definite_IE")
HR_abundant = subset_samples(abundant_OTU2, IE_dx=="Highrisk_IE")
HHS_abundant = subset_samples(abundant_OTU2, IE_dx=="Healthy")
Figure 3.10b
IE_abundant_heatmap2 <- plot_heatmap(IE_abundant, method = "NMDS",
distance = "euclidean",
taxa.label = "Species",
taxa.order = "Species",
trans=NULL,
low="white",
high="skyblue",
na.value="white",
sample.label = "SampleID")+
ggtitle("Abundant species in \nDefinite IE samples(NMDS)")+
theme(plot.title=element_text(size=17),
legend.position = "top",
legend.text =element_text(size=8),
legend.key.width = unit(1.5, "cm"),
axis.text.x = element_text(angle = 90,
hjust = 1,
vjust=0.5),
axis.text.y=element_text(size=11))
IE_abundant_heatmap2
Figure 3.10c
HR_abundant_heatmap2<-plot_heatmap(HR_abundant, method = "NMDS",
distance = "euclidean",
taxa.label = "Species",
taxa.order = "Species",
trans=NULL,
low="white",
high="purple",
na.value="white",
sample.label = "SampleID")+
ggtitle("Abundant species in \nHigh Risk IE samples(NMDS)")+
theme(plot.title=element_text(size=17),
legend.position = "top",
legend.text =element_text(size=8),
legend.key.width = unit(1.5, "cm"),
axis.text.x = element_text(angle = 90,
hjust = 1,
vjust=0.5),
axis.text.y=element_text(size=11))
HR_abundant_heatmap2
Figure 3.10d
HHS_abundant_heatmap2<-plot_heatmap(HHS_abundant, method = "NMDS",
distance = "euclidean",
taxa.label = "Species",
taxa.order = "Species",
trans=NULL,
low="white",
high="orange",
na.value="white",
sample.label = "SampleID")+
ggtitle("Abundant species in \nHHS samples(NMDS)")+
theme(plot.title=element_text(size=17),
legend.position = "top",
legend.text =element_text(size=8),
legend.key.width = unit(1.5, "cm"),
axis.text.x = element_text(angle = 90,
hjust = 1,
vjust=0.5),
axis.text.y=element_text(size=11))
HHS_abundant_heatmap2
Figure 3.12a
HHS_netplot<-plot_net(HHS_abundant,
distance = "(A+B-2*J)/(A+B)",
type = "taxa",
maxdist = 0.5,
color="Phylum",
point_label="Species",
point_size = 7,
point_alpha=0.5,
laymeth="circle",
hjust=0.5,
rescale = 0.3)+
theme(legend.position = "none")+
ggtitle("Network plot for Salivary Microbial Species in HHS Samples")
HHS_netplot
Figure 3.12b
IE_netplot<-plot_net(IE_abundant,
distance = "(A+B-2*J)/(A+B)",
type = "taxa",
maxdist = 0.5,
color="Phylum",
point_label="Species",
point_size = 7,
point_alpha=0.5,
laymeth="circle",
hjust=0.5,
rescale = 0.3)+
theme(legend.position = "none")+
ggtitle("Network plot for Salivary Microbial Species in Definite IE Samples")
IE_netplot
Figure 3.12c
HR_netplot<-plot_net(HR_abundant,
distance = "(A+B-2*J)/(A+B)",
type = "taxa",
maxdist = 0.5,
color="Phylum",
point_label="Species",
point_size = 7,
point_alpha=0.5,
laymeth="circle",
hjust=0.5,
rescale = 0.3)+
theme(legend.position = "none")+
ggtitle("Network plot for Salivary Microbial Species in High risk IE Samples")
HR_netplot